Source of data and the following data description: https://archive.ics.uci.edu/ml/datasets/GPS+Trajectories.

Abstract: The dataset has been feed by Android app called Go!Track. It is available at Goolge Play Store.

STEP 0: Load Libraries

STEP 1: Import relevant data set.

Get the GPS Activity Data

Source of data and the following data description: https://archive.ics.uci.edu/ml/datasets/GPS+Trajectories.

Data Set Information:

The dataset is composed by two tables. The first table go_track_tracks presents general attributes and each instance has one trajectory that is represented by the table go_track_trackspoints.

Attribute Information:

  1. go_track_tracks.csv: a list of trajectories
  • id_android - it represents the device used to capture the instance;
  • speed - it represents the average speed (Km/H)
  • distance - it represent the total distance (Km)
  • rating - it is an evaluation parameter. Evaluation the traffic is a way to verify the volunteers perception about the traffic during the travel, in other words, if volunteers move to some place and face traffic jam, maybe they will evaluate ‘bad’. (3- good, 2- normal, 1-bad).
  • rating_bus - it is other evaluation parameter. (1 - The amount of people inside the bus is little, 2 - The bus is not crowded, 3- The bus is crowded.
  • rating_weather - it is another evaluation parameter. ( 2- sunny, 1- raining).
  • car_or_bus - (1 - car, 2-bus)
  • linha - information about the bus that does the pathway
  1. go_track_trackspoints.csv: localization points of each trajectory
  • id: unique key to identify each point
  • latitude: latitude from where the point is
  • longitude: longitude from where the point is
  • track_id: identify the trajectory which the point belong
  • time: datetime when the point was collected (GMT-3)
##Load the data
dfTracks <- read.csv("./GPS_Trajectory/go_track_tracks.csv", 
                     stringsAsFactors = FALSE)
dfPoints <- read.csv("./GPS_Trajectory/go_track_trackspoints.csv", 
                     stringsAsFactors = FALSE)

Clean Data

Check for NA values and richness of ‘tracks’.

kable(head(dfTracks))
id id_android speed time distance rating rating_bus rating_weather car_or_bus linha
1 0 19.210586 0.1380489 2.652 3 0 0 1
2 0 30.848229 0.1714847 5.290 3 0 0 1
3 1 13.560101 0.0676986 0.918 3 0 0 2
4 1 19.766679 0.3895444 7.700 3 0 0 2
8 0 25.807401 0.1548006 3.995 2 0 0 1
10 2 1.346913 0.0066819 0.009 2 0 0 1
kable(head(dfPoints))
id latitude longitude track_id time
1 -10.93934 -37.06274 1 2014-09-13 07:24:32
2 -10.93934 -37.06274 1 2014-09-13 07:24:37
3 -10.93932 -37.06276 1 2014-09-13 07:24:42
4 -10.93921 -37.06284 1 2014-09-13 07:24:47
5 -10.93894 -37.06288 1 2014-09-13 07:24:53
6 -10.93854 -37.06284 1 2014-09-13 07:24:59
#Rename 'id' to 'index' for dfPoints to elleviate confusion, for this file we want 'track_id'
dfPoints <- dfPoints %>% rename(index = id, id = track_id)

#order the two data frames and check to ensure that the respective ID's align
dfPoints <- dfPoints %>% arrange(id)
dfTracks <- dfTracks %>% arrange(id)

#Check which id's will not align, and what index they are for diagnosis
which(!(unique(dfTracks$id) == unique(dfPoints$id)))
## integer(0)
sum(!(unique(dfTracks$id) == unique(dfPoints$id)))
## [1] 0
# unique(dfTracks$id)
# unique(dfPoints$track_id)
# summary(dfTracks)
# summary(dfPoints)

STEP 2. Aggregate data by country (or other location).

With the respective IDs aligned between two df’s, join the two together

#Join based on 'id' 
df <- left_join(dfPoints, dfTracks, by = "id")
rm(dfPoints); rm(dfTracks)

df <- df %>%
   rename(user = id_android, date_time = time.x, lat = latitude, lon = longitude) %>%
   select(id, user, lat, lon, date_time)
#Fix the time variable
df$date_time <- ymd_hms(df$date_time)
#Fix user from 0-27, to 1-28 and make catagorical (factor)
df$user <- as.factor(df$user + 1)

dim(df)
## [1] 18107     5
kable(head(df))
id user lat lon date_time
1 1 -10.93934 -37.06274 2014-09-13 07:24:32
1 1 -10.93934 -37.06274 2014-09-13 07:24:37
1 1 -10.93932 -37.06276 2014-09-13 07:24:42
1 1 -10.93921 -37.06284 2014-09-13 07:24:47
1 1 -10.93894 -37.06288 2014-09-13 07:24:53
1 1 -10.93854 -37.06284 2014-09-13 07:24:59

STEP 3. Merge data from data set to map data.

Find the range of Lat and Lon

range(df$lon)
## [1] -48.63292 -36.49336
range(df$lat)
## [1] -27.60317 -10.29284
labs <- lapply(seq(nrow(df)), function(i) {
   paste0( '<p><b>USER: </b>', df[i, "user"],
           '</p><p><b>DATE: </b>', format(df[i, "date_time"], format="%d %b %y"), 
           '</p><p><b>TIME: </b>', format(df[i, "date_time"], format = "%H:%M"), 
           '</p>' ) 
   } )

leafMap <- df %>% 
  leaflet() %>% 
  addTiles() %>%
  setView(lat = mean(df$lat, na.rm = TRUE),
          lng = mean(df$lon, na.rm = TRUE),
          zoom = 6) %>%
   addCircleMarkers(lng = ~lon, lat = ~lat,  
              color = "red", opacity = 1, radius = 0.2,
              label = lapply(labs, htmltools::HTML) )
leafMap

STEP 4. Create the plot(s).

The

user_id <- unique(df$user)
user_pal <- colorFactor(c(brewer.pal(n = length(user_id), 
                                     name = "Dark2")), 
                        user_id) #Accent, Dark2, Paired, Pastel1, Pastel2, Set1, Set2, Set3.

leafMap2 <- df %>% 
  leaflet() %>% 
  addTiles() %>%
  setView(lat = mean(df$lat, na.rm = TRUE),
          lng = mean(df$lon, na.rm = TRUE),
          zoom = 12) %>%
   addCircleMarkers(lng = ~lon, lat = ~lat,  
              color = ~user_pal(user), opacity = 1, radius = 0.15,
              label = lapply(labs, htmltools::HTML) ) %>% 
   # addPolylines(lat = ~lat, lng = ~lon) %>%
   addLayersControl(overlayGroups = c(user_id))
leafMap2

ggmap Approach

STEP 4. Create the plot(s).

dfDup <- data.frame(user = df$user, date_time = df$date_time)
dfDup$date_time <- dmy(format(dfDup$date_time, format = "%d %b %y"))
dfDup <- dfDup[!duplicated(dfDup), ]
#which dates
dfDup$date_time[duplicated(dfDup$date_time)]
##  [1] "2014-10-08" "2014-11-28" "2015-02-23" "2015-03-02" "2015-04-24"
##  [6] "2015-05-19" "2015-05-19" "2015-05-20" "2015-05-22" "2015-05-28"
## [11] "2015-05-28" "2015-05-29" "2015-05-29" "2015-06-03" "2015-06-03"
dfplot <- df %>%
   filter(format(date_time, format = "%m-%d-%Y") == "05-29-2015") %>%
   arrange(user, date_time) 

dfplot$user <-    plyr::revalue(dfplot$user, c("2" = "Alison", "11" = "Rodd", 
                                               "13" = "Gregory"))   
dfplot$user <- as.factor(as.character(dfplot$user))

labs <- lapply(seq(nrow(dfplot)), function(i) {
   paste0( '<p><b>USER: </b>', dfplot[i, "user"],
           '</p><p><b>DATE: </b>', format(dfplot[i, "date_time"], format="%d %b %y"), 
           '</p><p><b>TIME: </b>', format(dfplot[i, "date_time"], format = "%H:%M"), 
           '</p>' ) 
   } )

user_id <- unique(dfplot$user)
user_pal <- colorFactor(c(brewer.pal(n = length(user_id), 
                                     name = "Set1")), 
                        user_id) #Accent, Dark2, Paired, Pastel1, Pastel2, Set1, Set2, Set3.

leafMap3 <- dfplot %>% 
  leaflet() %>% 
  addTiles() %>%
  setView(lat = mean(dfplot$lat, na.rm = TRUE),
          lng = mean(dfplot$lon, na.rm = TRUE),
          zoom = 12) %>%
   addCircleMarkers(lng = ~lon, lat = ~lat,  
              color = ~user_pal(user), opacity = 0.8, radius = 0.1,
              label = lapply(labs, htmltools::HTML) ) %>% 
   addLayersControl(overlayGroups = c(user_id))
leafMap3   
library(ggplot2)
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
range(dfplot$lon)
## [1] -37.10016 -36.99810
range(df$lat)
## [1] -27.60317 -10.29284
base <- get_map(location = c(-37.1223, -11.0007, -36.9529, -10.8808), 
                maptype = "hybrid") #"hybrid" is likely googlemap; https://www.openstreetmap.org/export#map=13/-10.9408/-37.0376&layers=C
## Source : http://tile.stamen.com/terrain/13/3251/4345.png
## Source : http://tile.stamen.com/terrain/13/3252/4345.png
## Source : http://tile.stamen.com/terrain/13/3253/4345.png
## Source : http://tile.stamen.com/terrain/13/3254/4345.png
## Source : http://tile.stamen.com/terrain/13/3255/4345.png
## Source : http://tile.stamen.com/terrain/13/3251/4346.png
## Source : http://tile.stamen.com/terrain/13/3252/4346.png
## Source : http://tile.stamen.com/terrain/13/3253/4346.png
## Source : http://tile.stamen.com/terrain/13/3254/4346.png
## Source : http://tile.stamen.com/terrain/13/3255/4346.png
## Source : http://tile.stamen.com/terrain/13/3251/4347.png
## Source : http://tile.stamen.com/terrain/13/3252/4347.png
## Source : http://tile.stamen.com/terrain/13/3253/4347.png
## Source : http://tile.stamen.com/terrain/13/3254/4347.png
## Source : http://tile.stamen.com/terrain/13/3255/4347.png
map3 <- ggmap(base) + 
   geom_point(data = dfplot, aes(x = lon, y = lat, color = user),
              shape = 20, cex = 0.25) +
   geom_path(data = dfplot, aes(x = lon, y = lat, color = user)) +
   labs(x="Latitude", y="Longitude", title="3 User Tracks - 29 May 2015") +
   scale_color_manual(values = c("blue", "red", "purple")) +
   theme_bw() 
map3
## Warning: Removed 44 rows containing missing values (geom_point).
## Warning: Removed 44 row(s) containing missing values (geom_path).